R code for lecture 21

### HW 8 solutions ###
 
stirrett <- read.delim('ecol 563/Stirrett.txt')
 
# create raw data
trt1 <- rep(stirrett$Count, stirrett$Trt1)
trt2 <- rep(stirrett$Count, stirrett$Trt2)
trt3 <- rep(stirrett$Count, stirrett$Trt3)
trt4 <- rep(stirrett$Count, stirrett$Trt4)
 
# assemble results and add a treatment variable
borer <- data.frame(count=c(trt1, trt2, trt3, trt4), treatment=rep(1:4, c(length(trt1), length(trt2), length(trt3), length(trt4))))
 
# add the two factors that define the treatments
borer$early <- factor(ifelse(borer$treatment %in% c(3,4),'present','absent'))
borer$late <- factor(ifelse(borer$treatment %in% c(2,4),'present','absent'))
 
# fit interaction model
library(MASS)
out.NB1 <- glm.nb(count~early*late, data=borer
# fit one-way design
out.NB1a <- glm.nb(count~factor(treatment), data=borer)
# two-factor interaction model and one-way design are equivalent
AIC(out.NB1,out.NB1a)
sapply(list(out.NB1,out.NB1a),logLik)
 
# sequential analysis of deviance table
anova(out.NB1)
 
# refit main effects model
out.NB2 <- glm.nb(count~early, data=borer)
anova(out.NB2)
summary(out.NB2)
 
# mean count of early treatment is 40% that of no treatment
exp(coef(out.NB1)[2])
 
#### goodness of fit #####
 
# create data set of counts
junk<-data.frame(Count=0:26)
# merge with tabulated data
borer.table <- merge(stirrett, junk, all=T)
# change missing values to zero
temp <- sapply(2:5, function(x) ifelse(is.na(borer.table[,x]), 0, borer.table[,x]))
# assign result to data frame
borer.table[,2:5] <- temp
borer.table
 
# create the counts vector, stack the frequency columns, add a treatment column 
final.borer <- data.frame(count=rep(borer.table$Count,4), freq=unlist(borer.table[,2:5]), treatment=rep(1:4, each=nrow(borer.table)))
dim(final.borer)
 
# create the early and late factors
final.borer$early <- factor(ifelse(final.borer$treatment %in% c(3,4), 'present', 'absent'))
final.borer$late <- factor(ifelse(final.borer$treatment %in% c(2,4), 'present', 'absent'))
 
# add predicted treatment mean to the data frame
final.borer$mu <- predict(out.NB2, type='response', newdata=data.frame(early=final.borer$early))
 
# calculate negative binomial probabilities
final.borer$p <- dnbinom(final.borer$count ,mu=final.borer$mu, size=out.NB2$theta)
# probabilities do not sum to one
tapply(final.borer$p, final.borer$treatment, sum)
# add tail probability to P(X = 26) entry
final.borer$p <- final.borer$p + ifelse(final.borer$count==26, 1-pnbinom(26,mu=final.borer$mu, size=out.NB2$theta), 0)
tapply(final.borer$p,final.borer$treatment,sum)
final.borer[1:12,]
 
# count number of observations in each treatment
tapply(final.borer$freq, final.borer$treatment, sum)
n <- tapply(final.borer$freq, final.borer$treatment, sum)
 
# calculate expected counts
final.borer$exp <- final.borer$p*n[final.borer$treatment]
 
# graphical goodness of fit
library(lattice)
xyplot(freq~count|early+late, data=final.borer, ylab='frequency', panel=function(x,y,subscripts) {
panel.xyplot(x, y, type='h', lwd=8, col='grey',lineend=1)
panel.xyplot(x, final.borer$exp[subscripts], pch=16, cex=.7, type='o', col=1)})
 
# improve labeling of the panels
xyplot(freq~count|factor(early, levels=levels(early), labels=paste("Early = ", levels(early), sep='')) + factor(late, levels=levels(late), labels=paste("Late = ", levels(late),sep='')), data=final.borer, ylab='frequency', panel=function(x, y, subscripts) {
 panel.xyplot(x, y, type='h', lwd=8, col='grey',lineend=1)
 panel.xyplot(x, final.borer$exp[subscripts], pch=16, cex=.7, type='o', col=1)})
 
# place panels on top and on the left
library(latticeExtra)
xyplot(freq~count|factor(early, levels=levels(early), labels=paste("Early = ", levels(early),sep='')) + factor(late, levels=levels(late), labels=paste("Late = ", levels(late),sep='')), data=final.borer, ylab='frequency', panel=function(x, y, subscripts) {
 panel.xyplot(x, y, type='h', lwd=8, col='grey', lineend=1)
 panel.xyplot(x, final.borer$exp[subscripts], pch=16, cex=.7, type='o', col=1)}) -> mygraph
useOuterStrips(mygraph)
 
# parametric bootstrap goodness of fit
 
borer.p <- split(final.borer$p, final.borer$treatmen)
 myfunc <- function() {
out.obs <- as.vector(sapply(1:4, function(x) rmultinom(1, size=n[x], prob=borer.p[[x]])))
sum((out.obs-final.borer$exp)^2/final.borer$exp)
}
 
set.seed(100)
sim.data <- replicate(9999, myfunc())
actual <- sum((final.borer$freq-final.borer$exp)^2/final.borer$exp)
pearson <- c(sim.data, actual)
pval <- sum(pearson>=actual)/length(pearson)
pval
 


########## New material #############
 
### goodness of fit for NB model with continuous predictors ####
 
gala <- read.table('ecol 563/galapagos.txt', header=T)
out.NB2 <- glm.nb(Species~log(Area), data=gala)
 
# determine range of observed species counts
range(gala$Species)
 
# create probability vectors for each observation
out.p <- lapply(1:29, function(x) dnbinom(0:475, mu=fitted(out.NB2)[x], size=out.NB2$theta))
 
# determine the largest observation for which probability exceeds 5e-6
sapply(1:29, function(x) max((0:475)[out.p[[x]]>5e-6]))
 
# store this largest count as ux
gala$ux <- sapply(1:29, function(x) max((0:475)[out.p[[x]]>5e-6]))
# set lx to be zero
gala$lx <- rep(0,29)
 
# obtain maxiumum calculated probability for each island
gala$uy <- sapply(out.p, max)
# minimum probability
gala$ly <- rep(0,29)
 
# mean for each island
gala$mu<-fitted(out.NB2)
 
# observed probability according to the model
gala$z <- dnbinom(gala$Species, mu=fitted(out.NB2), size=out.NB2$theta)
 
# prepanel function to set limits for each graph
prepanel.ci2 <- function(x, y, ly, lx, uy, ux, subscripts, ...) {
list(ylim=range(y, uy[subscripts], ly[subscripts], finite=T), xlim=range(x, ux[subscripts], lx[subscripts], finite=T))}
 
# plot distributions for each island
xyplot(z~Species|Island, data=gala, ux=gala$ux, lx=gala$lx, uy=gala$uy, ly=gala$ly,
 prepanel=prepanel.ci2, panel=function(x,y,subscripts){
 panel.xyplot(0:475, dnbinom(0:475, mu=gala$mu[subscripts], size=out.NB2$theta), type='h', col='grey')
 panel.abline(v=x, col=2, lty=2)},
 scale=list(x='free', y='free'))
 
#uppet-tailed p-values
upper.p <- 1-pnbinom(gala$Species-1, mu=gala$mu, size=out.NB2$theta)
# number of significant p-values
sum(upper.p<.05)
# number of significant results expected by chance
.05*29
 
# lower-tailed p-values
lower.p <- pnbinom(gala$Species, mu=gala$mu, size=out.NB2$theta)
sum(lower.p<.05)
lower.p
R Code for Lecture 21 (Monday, November 5, 2012)